#coding: utf-8

sh_a='http://www.sse.com.cn/sseportal/webapp/datapresent/SSEQueryStockInfoAct?reportName=BizCompStockInfoRpt&CURSOR=%s'
#1-17
sh_b='http://www.sse.com.cn/sseportal/webapp/datapresent/SSEQueryStockInfoAct?reportName=BizCompStockInfoRpt&tab_flg=2&CURSOR=%s'


import urllib
from BeautifulSoup import BeautifulSoup
import threading
import time

start=time.time()
total=0
c=threading.Condition()
def getpage (url):
    global total
    page=urllib.urlopen(url).read()
    soup=BeautifulSoup(page)
    table=soup.find('table',{'width':"100%",'bgcolor':'#337fb2'})
    tr=table.tr.nextSibling.nextSibling
    while tr:
        print '%s   %s'%(tr.td.a.string.encode('gbk'),tr.td.nextSibling.nextSibling.string.encode('gbk'))
        tr=tr.nextSibling.nextSibling
        c.acquire()
        total=total+1
        c.release( )


threads=[]
for para in xrange(1,18):
    threads.append(threading.Thread(target=getpage,args=[sh_a%(50*(para-1)+1)]))
threads.append(threading.Thread(target=getpage,args=[sh_b%1]))
threads.append(threading.Thread(target=getpage,args=[sh_b%51]))
print '解析中...'.decode('utf-8').encode('gbk')
for t in threads:
    t.start()
for t in threads:
    t.join()

print time.time()-start
print total
